Washington D.C. Crime

DSAN 5200 Final Project

Authors
Affiliation

Brian Kwon

Georgetown University

Powell Sheagren

Georgetown University

Dheeraj Oruganty

Georgetown University

Published

April 29, 2024

Introduction

Intro

View 1

Code
library(rvest)
library(tidyverse)
library(plotly)

# Parse 2019 crime rate data from wikipedia
url = "https://en.wikipedia.org/wiki/List_of_United_States_cities_by_crime_rate"
page = read_html(url)
tables = html_table(page, fill = TRUE)
crime_data = tables[[1]]

# Preprocess the dataset
colnames(crime_data) = crime_data[2, ]
crime_data = crime_data[-c(1,2), ]
crime_data = crime_data %>% select(1,2,3,4) # Remove unnecessary columns
colnames(crime_data) = c("state", "city", "population", "crime_rate") # Change column names
crime_data$population = as.numeric(gsub(",", "", crime_data$population)) # Change to numeric
crime_data$crime_rate = as.numeric(crime_data$crime_rate) # Change to numeric

# Leave only one city per state by population
crime_data = crime_data %>%
  group_by(state) %>%
  slice(which.max(population))

# Remove footnote number
crime_data$city = gsub("\\d+$", "", crime_data$city)
crime_data$state =  gsub("\\d+$", "", crime_data$state)

# Change some city name manually for merging
crime_data = crime_data %>%
  mutate(city = if_else(city == "Washington, D.C.", "Washington", city)) %>%
  mutate(city = if_else(city == "Louisville Metro", "Louisville", city))

# Get latitude and longitude data
location = read.csv("./data/uscities.csv")
location = location %>% select(1,4,lat,lng)

# Merge two data sets
df = merge(crime_data, location, by = "city")
df = df %>% 
    filter(state == state_name) %>%
    select(-state_name)

# Plot bubble map
map = plot_geo(df, lat = ~lat, lon = ~lng) %>%
  add_markers(
    text = ~paste("State: ", state, "<br>City: ", city, "<br>Crime Rate: ", crime_rate, "<br>Population: ", population), 
    size = ~population, 
    color = ~crime_rate,
    marker = list(sizemode = 'area', sizeref = 0.2)) %>%
    colorbar(title = "Crime Rate") %>%
  layout(title = 'Crime Rate Bubble Map for US cities in 2019', geo = list(scope = 'usa'))
map

Fig 1: The size of the marker is by the population and the color is by the crime rate.

View 2

Code
library(tidyverse)
library(DT)

offense_22 = read.csv("./data/DC-2022/NIBRS_OFFENSE.csv") 
offense_21 = read.csv("./data/DC-2021/NIBRS_OFFENSE.csv") 
offense_20 = read.csv("./data/DC-2020/NIBRS_OFFENSE.csv") 
offense_19 = read.csv("./data/DC-2019/NIBRS_OFFENSE.csv") 
offense_18 = read.csv("./data/DC-2018/NIBRS_OFFENSE.csv") 

offense_code1 = read.csv("./data/DC-2022/NIBRS_OFFENSE_TYPE.csv")
offense_code2 = read.csv("./data/DC-2018/NIBRS_OFFENSE_TYPE.csv")

offense_22 = merge(offense_22, offense_code1, by = "offense_code")
offense_21 = merge(offense_21, offense_code1, by = "offense_code")
offense_20 = merge(offense_20, offense_code2, by = "OFFENSE_TYPE_ID")
offense_19 = merge(offense_19, offense_code2, by = "OFFENSE_TYPE_ID")
offense_18 = merge(offense_18, offense_code2, by = "OFFENSE_TYPE_ID")

offense_22_count = as.data.frame(round(table(offense_22$offense_name)/nrow(offense_22)*100,2))
offense_21_count = as.data.frame(round(table(offense_21$offense_name)/nrow(offense_21)*100,2))
offense_20_count = as.data.frame(round(table(offense_20$OFFENSE_NAME)/nrow(offense_20)*100,2))
offense_19_count = as.data.frame(round(table(offense_19$OFFENSE_NAME)/nrow(offense_19)*100,2))
offense_18_count = as.data.frame(round(table(offense_18$OFFENSE_NAME)/nrow(offense_18)*100,2))

offense_df = merge(merge(merge(merge(offense_18_count, offense_19_count, by = "Var1", all = TRUE), offense_20_count, by = "Var1", all = TRUE), offense_21_count, by = "Var1", all = TRUE), offense_22_count, by = "Var1", all = TRUE)
colnames(offense_df) = c("Offense Type", "2018", "2019", "2020", "2021", "2022")

datatable(data = offense_df,
          caption = "Table",
          filter = "top")

Fig 2: caption 2

View 3

Code
library(tidyverse)
library(plotly)
library(igraph)
library(GGally)
library(network)

offense_data_2018 <- read.csv("data/DC-2018/NIBRS_OFFENSE.csv") %>% mutate(year = 2018)
location_2018 <- read.csv("data/DC-2018/NIBRS_LOCATION_TYPE.csv")
offense_2018 <- read.csv("data/DC-2018/NIBRS_OFFENSE_TYPE.csv")
offense_data_2018 <- left_join(offense_data_2018,location_2018, by = "LOCATION_ID")
offense_data_2018 <- left_join(offense_data_2018,offense_2018, by = "OFFENSE_TYPE_ID")

offense_data_2019 <- read.csv("data/DC-2019/NIBRS_OFFENSE.csv") %>% mutate(year = 2019)
location_2019 <- read.csv("data/DC-2019/NIBRS_LOCATION_TYPE.csv")
offense_2019 <- read.csv("data/DC-2019/NIBRS_OFFENSE_TYPE.csv")
offense_data_2019 <- left_join(offense_data_2019,location_2019, by = "LOCATION_ID")
offense_data_2019 <- left_join(offense_data_2019,offense_2019, by = "OFFENSE_TYPE_ID")

offense_data_2020 <- read.csv("data/DC-2020/NIBRS_OFFENSE.csv") %>% mutate(year = 2020)
location_2020 <- read.csv("data/DC-2020/NIBRS_LOCATION_TYPE.csv")
offense_2020 <- read.csv("data/DC-2020/NIBRS_OFFENSE_TYPE.csv")
offense_data_2020 <- left_join(offense_data_2020,location_2020, by = "LOCATION_ID")
offense_data_2020 <- left_join(offense_data_2020,offense_2020, by = "OFFENSE_TYPE_ID")

offense_data_2021 <- read.csv("data/DC-2021/NIBRS_OFFENSE.csv") %>% mutate(year = 2021)
location_2021 <- read.csv("data/DC-2021/NIBRS_LOCATION_TYPE.csv")
offense_2021 <- read.csv("data/DC-2021/NIBRS_OFFENSE_TYPE.csv")
offense_data_2021 <- left_join(offense_data_2021,location_2021, by = "location_id")
offense_data_2021 <- left_join(offense_data_2021,offense_2021, by = "offense_code")

offense_data_2022 <- read.csv("data/DC-2022/NIBRS_OFFENSE.csv") %>% mutate(year = 2022)
location_2022 <- read.csv("data/DC-2022/NIBRS_LOCATION_TYPE.csv")
offense_2022 <- read.csv("data/DC-2022/NIBRS_OFFENSE_TYPE.csv")
offense_data_2022 <- left_join(offense_data_2022,location_2022, by = "location_id")
offense_data_2022 <- left_join(offense_data_2022,offense_2022, by = "offense_code")

offense_data_bundle1 = rbind(offense_data_2018,offense_data_2019,offense_data_2020)
offense_data_bundle2 = rbind(offense_data_2021,offense_data_2022)

colnames(offense_data_bundle1) = tolower(colnames(offense_data_bundle1))

offense_data_bundle1 <- offense_data_bundle1 %>% select(-c(offense_type_id, offense_code))
offense_data_bundle2 <- offense_data_bundle2 %>% select(-c(offense_code))

offense_data_total = rbind(offense_data_bundle1,offense_data_bundle2)

offense_data_total = offense_data_total %>% filter(offense_name %in% names(head(sort(table(offense_data_total$offense_name), decreasing = TRUE), 15)))

duplicated_data <- offense_data_total[offense_data_total$incident_id %in% offense_data_total$incident_id[duplicated(offense_data_total$incident_id)],]

num_incidents <- duplicated_data$incident_id %>% unique()

adjacency_list <- c("source","target") %>% t() %>% as.data.frame()

adjacency_list <- c("source","target") %>% t() %>% as.data.frame()

for(i in 1:length(num_incidents)){
  incident_instance <- num_incidents[i]
  incident_df <- duplicated_data %>% filter(incident_id == incident_instance)
  for(g in 1:(nrow(incident_df)-1)){
    if(g == nrow(incident_df)){
      life = 0
    }
    for(f in (g+1):nrow(incident_df)){
      insert <- c(incident_df[g,11],incident_df[f,11])
      adjacency_list <- rbind(adjacency_list, insert)
    }
  }
}
  
colnames(adjacency_list) <- adjacency_list[1,]
adjacency_list <- adjacency_list[-1,]

adjacency_matrix <- as.matrix(adjacency_list)
adjacency_matrix <- get.adjacency(graph.edgelist(adjacency_matrix)) %>% as.matrix()

crime_names <- rownames(adjacency_matrix) %>% as.data.frame()

offense_freq <- offense_data_total$offense_name %>% table() %>% as.data.frame()
offense_freq$Freq <- (offense_freq$Freq-mean(offense_freq$Freq))/ sd(offense_freq$Freq)

crime_stats <- left_join(crime_names, offense_freq)

net = network(adjacency_matrix, directed = FALSE)

net %v% "Crime" = crime_stats$.
net %v% "Freq" = crime_stats$Freq
net %v% "Label" = crime_stats$.

net <- set.edge.value(net, "Weight", adjacency_matrix)

p <- ggnet2(net,
            mode = "circle",
            node.size = "Freq",
            node.color = "Crime",
            edge.size = 0.2,
            edge.color = "grey",
            edge.alpha = .5) +
  theme(legend.position = "none") +
  labs(title = "Relationships between offenses within incidents")

ggplotly(p)

caption 3

View 4

Code
library(tidyverse)

## 2018
offense_data_2018 <- read.csv("data/DC-2018/NIBRS_OFFENSE.csv") %>% mutate(year = 2018)
offense_2018 <- read.csv("data/DC-2018/NIBRS_OFFENSE_TYPE.csv")
victim_data_2018 <- read.csv("data/DC-2018/NIBRS_VICTIM.csv") %>% mutate(year = 2018)
relation_2018 <- read.csv("data/DC-2018/NIBRS_VICTIM_OFFENDER_REL.csv")
relationship_2018 <- read.csv("data/DC-2018/NIBRS_RELATIONSHIP.csv")

offense_data_2018 <- left_join(offense_data_2018,offense_2018, by = "OFFENSE_TYPE_ID")
relation_2018 <- left_join(relation_2018,relationship_2018, by = "RELATIONSHIP_ID")
victim_data_2018 <- right_join(victim_data_2018,relation_2018, by = "VICTIM_ID")
total_data_2018 <- left_join(victim_data_2018,offense_data_2018, by = c("INCIDENT_ID","year"))
total_data_2018 <- total_data_2018 %>% select(c(RELATIONSHIP_NAME,OFFENSE_CATEGORY_NAME, year))

## 2019
offense_data_2019 <- read.csv("data/DC-2019/NIBRS_OFFENSE.csv") %>% mutate(year = 2019)
offense_2019 <- read.csv("data/DC-2019/NIBRS_OFFENSE_TYPE.csv")
victim_data_2019 <- read.csv("data/DC-2019/NIBRS_VICTIM.csv") %>% mutate(year = 2019)
relation_2019 <- read.csv("data/DC-2019/NIBRS_VICTIM_OFFENDER_REL.csv")
relationship_2019 <- read.csv("data/DC-2019/NIBRS_RELATIONSHIP.csv")

offense_data_2019 <- left_join(offense_data_2019,offense_2019, by = "OFFENSE_TYPE_ID")
relation_2019 <- left_join(relation_2019,relationship_2019, by = "RELATIONSHIP_ID")
victim_data_2019 <- right_join(victim_data_2019,relation_2019, by = "VICTIM_ID")
total_data_2019 <- left_join(victim_data_2019,offense_data_2019, c("INCIDENT_ID","year"))
total_data_2019 <- total_data_2019 %>% select(c(RELATIONSHIP_NAME,OFFENSE_CATEGORY_NAME, year))

## 2020
offense_data_2020 <- read.csv("data/DC-2020/NIBRS_OFFENSE.csv") %>% mutate(year = 2020)
offense_2020 <- read.csv("data/DC-2020/NIBRS_OFFENSE_TYPE.csv")
victim_data_2020 <- read.csv("data/DC-2020/NIBRS_VICTIM.csv") %>% mutate(year = 2020)
relation_2020 <- read.csv("data/DC-2020/NIBRS_VICTIM_OFFENDER_REL.csv")
relationship_2020 <- read.csv("data/DC-2020/NIBRS_RELATIONSHIP.csv")

offense_data_2020 <- left_join(offense_data_2020,offense_2020, by = "OFFENSE_TYPE_ID")
relation_2020 <- left_join(relation_2020,relationship_2020, by = "RELATIONSHIP_ID")
victim_data_2020 <- right_join(victim_data_2020,relation_2020, by = "VICTIM_ID")
total_data_2020 <- left_join(victim_data_2020,offense_data_2020, by = c("INCIDENT_ID","year"))
total_data_2020 <- total_data_2020 %>% select(c(RELATIONSHIP_NAME,OFFENSE_CATEGORY_NAME, year))

## 2021
offense_data_2021 <- read.csv("data/DC-2021/NIBRS_OFFENSE.csv") %>% mutate(year = 2021)
offense_2021 <- read.csv("data/DC-2021/NIBRS_OFFENSE_TYPE.csv")
victim_data_2021 <- read.csv("data/DC-2021/NIBRS_VICTIM.csv") %>% mutate(year = 2021)
relation_2021 <- read.csv("data/DC-2021/NIBRS_VICTIM_OFFENDER_REL.csv")
relationship_2021 <- read.csv("data/DC-2021/NIBRS_RELATIONSHIP.csv")

offense_data_2021 <- left_join(offense_data_2021,offense_2021, by = "offense_code")
relation_2021 <- left_join(relation_2021,relationship_2021, by = "relationship_id")
victim_data_2021 <- right_join(victim_data_2021,relation_2021, by = "victim_id")
total_data_2021 <- left_join(victim_data_2021,offense_data_2021, by = c("incident_id","year"))
total_data_2021 <- total_data_2021 %>% select(c(relationship_name,offense_category_name, year))

## 2022
offense_data_2022 <- read.csv("data/DC-2022/NIBRS_OFFENSE.csv") %>% mutate(year = 2022)
offense_2022 <- read.csv("data/DC-2022/NIBRS_OFFENSE_TYPE.csv")
victim_data_2022 <- read.csv("data/DC-2022/NIBRS_VICTIM.csv") %>% mutate(year = 2022)
relation_2022 <- read.csv("data/DC-2022/NIBRS_VICTIM_OFFENDER_REL.csv")
relationship_2022 <- read.csv("data/DC-2022/NIBRS_RELATIONSHIP.csv")

offense_data_2022 <- left_join(offense_data_2022,offense_2022, by = "offense_code")
relation_2022 <- left_join(relation_2022,relationship_2022, by = "relationship_id")
victim_data_2022 <- right_join(victim_data_2022,relation_2022, by = "victim_id")
total_data_2022 <- left_join(victim_data_2022,offense_data_2022, by = c("incident_id","year"))
total_data_2022 <- total_data_2022 %>% select(c(relationship_name,offense_category_name, year))

## adjusting colnames for difference
colnames(total_data_2021) <- c("RELATIONSHIP_NAME","OFFENSE_CATEGORY_NAME", "year")
colnames(total_data_2022) <- c("RELATIONSHIP_NAME","OFFENSE_CATEGORY_NAME", "year")

## groups
total_data_relation <- rbind(total_data_2018, total_data_2019, total_data_2020, total_data_2021, total_data_2022)

## relationships store for next chunk
relationships <- total_data_relation$RELATIONSHIP_NAME %>% factor() %>% levels()

## Splitting the relationships type into indicies and then filtering by them
family_relationships_index <- c(6,14,15,16,19,21,22)
partner_relationships_index <- c(1,5,7,8,11,12,23,24,25,26)
acquaintance_relationships_index <- c(3,4,9,10,13,17,18,20)
stranger_relationships_index <- c(27)
other_relationships_index <- c(2)

family_relationships <- relationships[family_relationships_index]
partner_relationships <- relationships[partner_relationships_index]
acquaintance_relationships <- relationships[acquaintance_relationships_index]
stranger_relationships <- relationships[stranger_relationships_index]
other_relationships <- relationships[other_relationships_index]

## Function for new column of values
relation_checker <- function(value){
  if(value %in% family_relationships){
    val <- "Family"
  } else if(value %in% partner_relationships){
    val <- "Partner/Partners Family"
  } else if(value %in% acquaintance_relationships){
    val <- "Acquaintance"
  } else if(value %in% stranger_relationships){
    val <- "Stranger"
  } else{
    val <- "Other"
  }
}

## vectorizing the function and adding the colum
relation_checker <- Vectorize(relation_checker)

total_data_relation <- total_data_relation %>% mutate(Relation_group = relation_checker(RELATIONSHIP_NAME)) %>% filter(Relation_group != "Other")
#total_data_relation$Relation_group %>% table()

library(tidyverse)
library(viridis)
library(plotly)
library(heatmaply)

## Making matrix for Viz
mat <- total_data_relation %>% group_by(Relation_group,OFFENSE_CATEGORY_NAME) %>% tally() %>%
  spread(Relation_group,n) %>% as.data.frame()
mat[is.na(mat)] <- 0
rownames(mat) <- mat$OFFENSE_CATEGORY_NAME
mat <- mat %>% select(-OFFENSE_CATEGORY_NAME)

## Heatmap code
ptotal <- heatmaply(mat,
                    label_names = c("Crime Group", "Relation", "Relation Prevelance"),
                    width  = 800, height = 800,
                    dendrogram = FALSE,
                    limits = c(0,10000),
                    scale = "row",
                    branches_lwd = 0.1,
                    hide_colorbar = TRUE,
                    grid_color = "white",
                    grid_width = 0.00001,
                    dend_hoverinfo = FALSE,
                    main = "Heatmap of offense category by relationship between victim and offender")
ptotal

caption 3

View 5

View 6

Code
library(tidyverse)
library(networkD3)
library(htmlwidgets)
library(htmltools)

# Read all necessary files
offense_18 = read.csv("./data/DC-2018/NIBRS_OFFENSE.csv") 
offender_18 = read.csv("./data/DC-2018/NIBRS_OFFENDER.csv") 
victim_18 = read.csv("./data/DC-2018/NIBRS_VICTIM.csv")
weapon_18 = read.csv("./data/DC-2018/NIBRS_WEAPON.csv") 
injury_18 = read.csv("./data/DC-2018/NIBRS_VICTIM_INJURY.csv")

# Select offense_id, incident_id, offender_id, victim_id, offense_code, injury_id, weapon_id
offense_18 = offense_18 %>% select(2,3,4)
offender_18 = offender_18 %>% select(2,3)
victim_18 = victim_18 %>% select(2,3)
weapon_18 = weapon_18 %>% select(2,3)
injury_18 = injury_18 %>% select(2,3)

# Read codes files for nodes
offense_code = read.csv("./data/DC-2018/NIBRS_OFFENSE_TYPE.csv")
injury_code = read.csv("./data/DC-2018/NIBRS_INJURY.csv")
weapon_code = read.csv("./data/DC-2018/NIBRS_WEAPON_TYPE.csv")

# Get offense_code, offense_type_id, offense_name
offense_code = offense_code %>% select(1,2,3)
# Change offense_type_id to offense_code
offense_18 = merge(offense_18, offense_code, by = "OFFENSE_TYPE_ID")
offense_18 = offense_18 %>% select(2,3,4)
# Merge by incident_id, offense_id, victim_id
df_18 = merge(merge(merge(merge(offense_18, offender_18, by = "INCIDENT_ID"), victim_18, by = "INCIDENT_ID"), injury_18, by = "VICTIM_ID"), weapon_18, by = "OFFENSE_ID")
# Remove incident_id, offense_id, victim_id, offender_id
df_18 = df_18 %>% select(-1,-2,-3,-5)

# # Make column names to lower case
colnames(df_18) = tolower(colnames(df_18))

# Paste character to make ids unique
df_18$injury_id = paste0("i", df_18$injury_id)
df_18$weapon_id = paste0("w", df_18$weapon_id)

# # Count the unique combinations of offense types and weapon types and subset if there are more than 100 cases
first_link = df_18 %>%
    group_by(offense_code, weapon_id) %>%
    summarise(value = n(), .groups = "drop") %>%
    arrange(desc(value)) %>%
    rename(source = offense_code, target = weapon_id) %>%
    filter(value > 100)
# # Count the unique combinations of weapon types and injury types and subset if there are more than 100 cases
second_link = df_18 %>%
    group_by(weapon_id, injury_id) %>%
    summarise(value = n(), .groups = "drop") %>%
    arrange(desc(value)) %>%
    rename(source = weapon_id, target = injury_id) %>%
    filter(value > 100)
# # Combine those two links
links.df = as.data.frame(rbind(first_link,second_link))

# Get the codes and names
offense_code = offense_code %>%
    select(2,3) %>%
    rename(name = OFFENSE_CODE, label = OFFENSE_NAME)
injury_code = injury_code %>% 
    select(1,3) %>%
    rename(name = INJURY_ID, label = INJURY_NAME)
weapon_code = weapon_code %>% 
    select(1,3) %>%
    rename(name = WEAPON_ID, label = WEAPON_NAME)

# Make codes unique
injury_code$name = paste0("i", injury_code$name)
weapon_code$name = paste0("w", weapon_code$name)
# Combine all the nodes
nodes.df = rbind(offense_code, injury_code, weapon_code)
# Subset only nodes from the links
nodes.df = nodes.df %>% filter(name %in% c(unique(first_link$source),unique(first_link$target),unique(second_link$target)))

# Create source_id and target_id for a sankey diagram
links.df$source_id = match(links.df$source, nodes.df$name) - 1 
links.df$target_id = match(links.df$target, nodes.df$name) - 1 

# Create a sankey diagram
net = sankeyNetwork(Links = links.df,     
              Nodes = nodes.df,     
              Source = 'source_id', 
              Target = 'target_id', 
              Value = 'value',     
              NodeID = 'label',      
              fontSize = 16,        
              iterations = 0)

# Add a title
net_with_title = prependContent(net, tags$b(HTML('Injuries and weapon type by offense type in 2018')))
net_with_title
Injuries and weapon type by offense type in 2018

fff

Code
library(tidyverse)
library(networkD3)
library(htmlwidgets)
library(htmltools)

# Read all necessary files
offense_22 = read.csv("./data/DC-2022/NIBRS_OFFENSE.csv") 
offender_22 = read.csv("./data/DC-2022/NIBRS_OFFENDER.csv") 
victim_22 = read.csv("./data/DC-2022/NIBRS_VICTIM.csv")
weapon_22 = read.csv("./data/DC-2022/NIBRS_WEAPON.csv") 
injury_22 = read.csv("./data/DC-2022/NIBRS_VICTIM_INJURY.csv")

# Select offense_id, incident_id, offender_id, victim_id, offense_code, injury_id, weapon_id
offense_22 = offense_22 %>% select(2,3,4)
offender_22 = offender_22 %>% select(2,3)
victim_22 = victim_22 %>% select(2,3)
weapon_22 = weapon_22 %>% select(2,3)
injury_22 = injury_22 %>% select(2,3)

# Merge by incident_id, offense_id, victim_id
df_22 = merge(merge(merge(merge(offense_22, offender_22, by = "incident_id"), victim_22, by = "incident_id"), injury_22, by = "victim_id"), weapon_22, by = "offense_id")
# Remove incident_id, offense_id, victim_id, offender_id
df_22 = df_22 %>% select(-1,-2,-3,-5)

# Paste character to make ids unique
df_22$injury_id = paste0("i", df_22$injury_id)
df_22$weapon_id = paste0("w", df_22$weapon_id)

# Count the unique combinations of offense types and weapon types and subset if there are more than 100 cases
first_link = df_22 %>%
    group_by(offense_code, weapon_id) %>%
    summarise(value = n(), .groups = "drop") %>%
    arrange(desc(value)) %>%
    rename(source = offense_code, target = weapon_id) %>%
    filter(value > 100)
# Count the unique combinations of weapon types and injury types and subset if there are more than 100 cases
second_link = df_22 %>%
    group_by(weapon_id, injury_id) %>%
    summarise(value = n(), .groups = "drop") %>%
    arrange(desc(value)) %>%
    rename(source = weapon_id, target = injury_id) %>%
    filter(value > 100)
# Combine those two links
links.df = as.data.frame(rbind(first_link,second_link))

# Read codes files for nodes
offense_code = read.csv("./data/DC-2022/NIBRS_OFFENSE_TYPE.csv")
injury_code = read.csv("./data/DC-2022/NIBRS_INJURY.csv")
weapon_code = read.csv("./data/DC-2022/NIBRS_WEAPON_TYPE.csv")

# Get the codes and names
offense_code = offense_code %>% 
    select(1,2) %>%
    rename(name = offense_code, label = offense_name)
injury_code = injury_code %>% 
    select(1,3) %>%
    rename(name = injury_id, label = injury_name)
weapon_code = weapon_code %>% 
    select(1,3) %>%
    rename(name = weapon_id, label = weapon_name)

# Make codes unique
injury_code$name = paste0("i", injury_code$name)
weapon_code$name = paste0("w", weapon_code$name)
# Combine all the nodes
nodes.df = rbind(offense_code, injury_code, weapon_code)
# Subset only nodes from the links
nodes.df = nodes.df %>% filter(name %in% c(unique(first_link$source),unique(first_link$target),unique(second_link$target)))

# Create source_id and target_id for a sankey diagram
links.df$source_id = match(links.df$source, nodes.df$name) - 1 
links.df$target_id = match(links.df$target, nodes.df$name) - 1 

# Create a sankey diagram
net = sankeyNetwork(Links = links.df,     
              Nodes = nodes.df,     
              Source = 'source_id', 
              Target = 'target_id', 
              Value = 'value',     
              NodeID = 'label',      
              fontSize = 16,        
              iterations = 0)

# Add a title
net_with_title = prependContent(net, tags$b(HTML('Injuries and weapon type by offense type in 2022')))
net_with_title
Injuries and weapon type by offense type in 2022

caption 5

Conclusion